from scipy import stats
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore", category=UserWarning)


'''
This function create the dataset used in the two main files.

'''

#%% Files 
compustat_data = 'local_data/' # Add COMPUSTAT here
trucost_data = 'local_data/'   # Add TruCost here
refinitiv_fundamentals = 'local_data/' # Add Refinitiv fundamental data here
#%%
class MakeUniverse():
    def __init__(self):pass
    #===========
    def make_financials(self):
        compustat = pd.read_csv(compustat_data, low_memory=False)
        compustat = compustat.sort_values(by = ['gvkey', 'fyear'])
        compustat['at_lagged'] = compustat[['at_usd', 'gvkey']].groupby('gvkey').shift(1)
        compustat['Tangibility'] = compustat['ppent_usd']/compustat['at_usd']
        compustat['Profitability'] = compustat['ebitda_usd']/compustat['at_lagged']
        tc = pd.read_csv(trucost_data)
        regions = tc[['SP_GEOGRAPHY', 'gvkey']].groupby('gvkey').last()
        compustat = compustat.merge(regions.reset_index())
        sector = compustat[['gvkey', 'GICS_level_1']].groupby('gvkey').last()
        sector = pd.get_dummies(sector, drop_first=True)
        sector_dummies = ['GICS_level_1_Energy', 'GICS_level_1_Industrial', 'GICS_level_1_Material', 'GICS_level_1_Utilities']
        regions = compustat[['SP_GEOGRAPHY', 'gvkey']].groupby('gvkey').last()
        regions = pd.get_dummies(regions, drop_first=False)
        regions = regions.drop(columns = ['SP_GEOGRAPHY_Middle East'])
        
        #=== Merge with Market Leverage
        ML = pd.read_csv(refinitiv_fundamentals)
        compustat = compustat.merge(ML[['mrg', 'MarketLeverage']])
        compustat = compustat[(compustat.MarketLeverage >= 0) & (compustat.MarketLeverage<= 1)]
        return compustat, regions, sector, tc
    


    def make_paris(self, ProjectionYear, target_type='below'):
        _, paris, internal = self.make_universe_targets_2020(ProjectionYear,target_type)
        paris['alignment'] = np.sign(paris[target_type])
        paris['alignment'] = paris['alignment'].replace([-1,1], [1,0])
        paris = paris[paris.cyear == ProjectionYear]
        base_dummies = pd.get_dummies(paris.base_year)
        base_dummies = base_dummies.drop(columns = [2014])
        paris = pd.concat((paris, base_dummies), axis = 1)
        base_dummies = list(base_dummies)
        return paris, base_dummies
    def make_other_esg(self, st, version_ = ''):
        ref = pd.read_csv('local_data/refinitiv_esg_data_mixed'+version_+'.csv')
        ref = ref.replace([True, False], [1,0])
        vars_ = ['supplier_training', 
                 'esg_exec_comp',
                 'environmental_supchain_policy', 
                 'environmental_partnerships',
                 'env_material_sourcing', 
                 'env_products',
                'EDI_target',
                'policy_board', 
                'stakeholder_eng'
                 ]
        ref = ref.loc[ref[vars_].dropna().index].reset_index()
        ref['mrg'] = ref['gvkey'].astype(int).astype(str)+'-'+ref['fyear'].astype(int).astype(str)
        #==== CDP internal targets data
        cdp_t =  pd.read_csv('local_data/CDP_minimal_target.csv')
        gvkey_org = st[['gvkey', 'ISIN']].groupby('gvkey').last()
        cdp_t = cdp_t.merge(gvkey_org.reset_index())
        cdp_t['mrg'] =cdp_t['gvkey'].astype(int).astype(str)+'-'+cdp_t['Year'].astype(int).astype(str)
        #==== Refinitiv internal targets data
        ref_t = pd.read_csv('local_data/refinitiv_target_emissions_data.csv')
        ref_t['mrg'] = ref_t['gvkey'].astype(int).astype(str)+'-'+ref_t['fyear'].astype(int).astype(str)
        
        ref = ref.merge(ref_t[['mrg', 'Is_target']])
        return ref, vars_, cdp_t, ref_t

    def make_universe_targets_2020(self, year = 2019, target_type = 'well_below'):
        #== Get the TruCost Alignment Data
        unA =  pd.read_csv('local_data/ParisAlignment.csv')
        unA = unA[unA.cyear == 2018]
        unB = pd.read_csv('local_data/ParisAlignment_2020.csv')
        un = pd.concat((unA, unB))
        un = un.drop(columns = ['SP_COMPANY_STATUS'])
        un = un[un.cyear == year]
       
        un['mrg'] = un['gvkey'].astype(int).astype(str)+'-'+un['cyear'].astype(int).astype(str)
        un = un.groupby('mrg').last().reset_index()
        
        #== Get alignment and misalignment groups [choose the type of target]
        un['signed'] = un[target_type].apply(np.sign)
        un['alignment'] = un['signed'].replace([-1,1], ['Aligned', 'Misaligned'])
    
        #== Get Refinitiv Target Data
        target_ = pd.read_csv('local_data/refinitiv_target_emissions_data.csv')
        #==
        dt = target_.merge(un[['mrg', 'GICS_level_1', 'base_year', 'horizon_year', 'below', 'well_below', 'signed', 'alignment', 'cyear']],on = 'mrg')
        return dt, un, target_
    
    
    def get_paris_for_analysis(self, target_type):
        MAX_ProjectionYear = 2020
        paris2018, base_dummies2018 = self.make_paris(2018, target_type=target_type)
        paris2019, base_dummies2019 = self.make_paris(2019, target_type=target_type)
        paris2020, base_dummies2020 = self.make_paris(2020, target_type=target_type)
        base_dummies = list(set(base_dummies2018) | set(base_dummies2019) | set(base_dummies2020))
    
        return MAX_ProjectionYear, paris2018,paris2019,paris2020, base_dummies
